library(factoextra)
## Loading required package: ggplot2
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#######
d <- read.csv("dfall.csv", sep = ",")
all.data <- d[,-c(1,2)] #delete the numbering column
rownames(all.data) <- d$.rownames
all.data <- all.data[-which(d$.rownames %in% c("and","with","for","when","the")),]
colnames(all.data) <- c("P1","P2","P3","P4","P5","P6","P7","P8","P9","P10","P11","P12","P13","P14","P15","P16","P17","P18","P19","P20","P21","P22","P23","P24","P25","P26","P27","P28",
"P29","P30","P31","P32","P33","P34","P35","P36","P37")
#### sort patients by the length they lived(days)
c1<- c("P12", "P23", "P13", "P3", "P5", "P7", "P21", "P16", "P17")
c11 <- c("P37", "P31", "P2", "P11")
c21 <- c("P29", "P6", "P26", "P10", "P9")
c31 <- c("P28", "P19", "P1", "P18", "P4")
# Elbow Method
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
###c1, rid of rows that has 0 as sum
vis <- function(x,y){
sel <- all.data %>% select(x)
sum<- rowSums(sel)
sel <- cbind(sel, sum)
no0 <-sel[sum !=0,]
run<- no0[,-10]
set.seed(20)
km <- kmeans(run, centers = 3, nstart = 20)
return(ggplotly(fviz_cluster(km, data = run, main = y)))
}
gp1 <- vis(c1, "word cluster(day=1~10)")
gp2 <- vis(c11, "word cluster(day=11~20)")
gp3 <- vis(c21, "word cluster(day=21~30)")
gp4 <- vis(c31, "word cluster(day=31~)")
gp1
gp2
gp3
gp4